# -*- coding: utf-8 -*-
import requests
import lxml
from bs4 import BeautifulSoup
import html2text
import re

def cut(reg, str):
    res = re.sub(reg, '', str, flags=re.S|re.M)
    return res

if __name__ == "__main__":
    # 获取文章
    req = requests.get('https://mark-thinkpad.github.io/2019/01/20/sp/')
    html = req.text
    soup = BeautifulSoup(html, 'lxml')
    art = str(soup.article)

    # 去除一些不必要的HTML标签
    art = cut(r'<time.*?</time>', art)
    art = cut(r'<div class="metatag.*?</div>', art)
    art = cut(r'<div class="prev-next">.*?</div>', art)

    # 转为md源码并写入文件
    md = html2text.html2text(art)
    with open('example.md', 'w+', encoding='utf-8') as file:
        file.write(md)